#!/bin/bash

# 洗护系统监控脚本

set -e

# 配置
BACKEND_URL="http://localhost:8080"
CHECK_INTERVAL=30
LOG_FILE="logs/monitor.log"

# 创建日志目录
mkdir -p logs

# 日志函数
log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}

# 检查服务状态
check_service() {
    local service_name="$1"
    local url="$2"
    
    if curl -f -s "$url" > /dev/null 2>&1; then
        log "✅ $service_name 正常"
        return 0
    else
        log "❌ $service_name 异常"
        return 1
    fi
}

# 检查Docker容器状态
check_containers() {
    log "🐳 检查Docker容器状态..."
    
    local containers=("laundry-mysql" "laundry-redis" "laundry-backend" "laundry-nginx")
    local all_healthy=true
    
    for container in "${containers[@]}"; do
        if docker ps --filter "name=$container" --filter "status=running" | grep -q "$container"; then
            log "✅ 容器 $container 运行正常"
        else
            log "❌ 容器 $container 未运行"
            all_healthy=false
        fi
    done
    
    return $all_healthy
}

# 检查系统资源
check_resources() {
    log "💻 检查系统资源..."
    
    # CPU使用率
    local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
    log "📊 CPU使用率: ${cpu_usage}%"
    
    # 内存使用率
    local mem_usage=$(free | grep Mem | awk '{printf "%.1f", $3/$2 * 100.0}')
    log "📊 内存使用率: ${mem_usage}%"
    
    # 磁盘使用率
    local disk_usage=$(df -h / | awk 'NR==2{print $5}')
    log "📊 磁盘使用率: $disk_usage"
    
    # 检查阈值
    if (( $(echo "$cpu_usage > 80" | bc -l) )); then
        log "⚠️ CPU使用率过高: ${cpu_usage}%"
    fi
    
    if (( $(echo "$mem_usage > 80" | bc -l) )); then
        log "⚠️ 内存使用率过高: ${mem_usage}%"
    fi
}

# 检查数据库连接
check_database() {
    log "🗄️ 检查数据库连接..."
    
    local response=$(curl -s "${BACKEND_URL}/api/test/db-info")
    if echo "$response" | grep -q '"success":true'; then
        log "✅ 数据库连接正常"
        
        # 提取数据库信息
        local user_count=$(echo "$response" | grep -o '"userCount":[0-9]*' | cut -d':' -f2)
        local category_count=$(echo "$response" | grep -o '"categoryCount":[0-9]*' | cut -d':' -f2)
        
        log "📊 用户数量: $user_count"
        log "📊 分类数量: $category_count"
        
        return 0
    else
        log "❌ 数据库连接失败"
        return 1
    fi
}

# 检查API响应时间
check_api_performance() {
    log "⚡ 检查API性能..."
    
    local apis=(
        "/api/services/categories"
        "/api/merchants"
        "/api/services"
    )
    
    for api in "${apis[@]}"; do
        local start_time=$(date +%s%N)
        if curl -f -s "${BACKEND_URL}${api}" > /dev/null 2>&1; then
            local end_time=$(date +%s%N)
            local response_time=$(( (end_time - start_time) / 1000000 ))
            log "📊 $api 响应时间: ${response_time}ms"
            
            if [ "$response_time" -gt 1000 ]; then
                log "⚠️ $api 响应时间过长: ${response_time}ms"
            fi
        else
            log "❌ $api 请求失败"
        fi
    done
}

# 发送告警（可扩展）
send_alert() {
    local message="$1"
    log "🚨 告警: $message"
    
    # 这里可以集成邮件、短信、钉钉等告警方式
    # 例如：
    # curl -X POST "https://oapi.dingtalk.com/robot/send?access_token=YOUR_TOKEN" \
    #      -H "Content-Type: application/json" \
    #      -d "{\"msgtype\":\"text\",\"text\":{\"content\":\"$message\"}}"
}

# 主监控循环
monitor_loop() {
    log "🚀 开始监控洗护系统..."
    
    while true; do
        log "🔍 执行健康检查..."
        
        local all_healthy=true
        
        # 检查各项服务
        if ! check_service "后端服务" "${BACKEND_URL}/api/test/db-info"; then
            all_healthy=false
            send_alert "后端服务异常"
        fi
        
        if ! check_containers; then
            all_healthy=false
            send_alert "Docker容器异常"
        fi
        
        if ! check_database; then
            all_healthy=false
            send_alert "数据库连接异常"
        fi
        
        check_resources
        check_api_performance
        
        if [ "$all_healthy" = true ]; then
            log "🎉 系统运行正常"
        else
            log "⚠️ 系统存在异常，请检查"
        fi
        
        log "⏰ 等待 $CHECK_INTERVAL 秒后进行下次检查..."
        sleep "$CHECK_INTERVAL"
    done
}

# 显示帮助信息
show_help() {
    echo "洗护系统监控脚本"
    echo ""
    echo "用法: $0 [选项]"
    echo ""
    echo "选项:"
    echo "  --once      执行一次检查后退出"
    echo "  --help      显示此帮助信息"
    echo ""
    echo "默认行为: 持续监控系统状态"
}

# 主函数
main() {
    case "$1" in
        --help)
            show_help
            exit 0
            ;;
        --once)
            log "🔍 执行单次健康检查..."
            check_service "后端服务" "${BACKEND_URL}/api/test/db-info"
            check_containers
            check_database
            check_resources
            check_api_performance
            log "✅ 单次检查完成"
            ;;
        *)
            monitor_loop
            ;;
    esac
}

# 信号处理
trap 'log "🛑 监控脚本已停止"; exit 0' SIGINT SIGTERM

# 执行主函数
main "$@"
